This is a statistical analysis of the prevalence of TST reactivity among household contacts of index TB cases in the HomeACF Study, a cluster randomised trial done in two provinces of South Africa.
This documents reports revised analysis following reviewer’s comments received on 25th Februrary 2020.
Load all required packages for analysis.
library(tidyverse) #for data manipulation
library(pmthemes) #for ggplot themes
library(knitr) #for tables
library(arsenal) #for summary table
library(brms) #for regression modelling
library(tidybayes) #for processing bayesian estimates
library(Hmisc) #for multiple imputation
library(gt) #for tables
library(janitor) #for cleaning
library(cowplot) #for combining graphs
library(patchwork) #for combining graphs
library(here) #for file system organisation
library(glue) #for string manipulationImport data required for the analysis.
Now do some tidying up of data.
Here we do multiple imputation using a flexible additive regression model with bootstrapping.
mdata <- tstsa %>%
dplyr::select(record_id, contact_id, tstdiam_h,
sex_h, ageyears_h, hivfinal_h,timespent_h,
sharebedroom_h, site, ageyears_i, sex_i,
smoke_i, hiv_i, microconf_i, num_contacts,
smoke_h, totalrooms_h, windows_h) %>%
mutate(hiv_i = case_when(hiv_i=="c) HIV-unknown" ~ NA_character_,
TRUE ~ hiv_i)) %>%
mutate(hivfinal_h = case_when(hivfinal_h=="c) HIV unknown" ~ NA_character_,
TRUE ~ hivfinal_h))
imp1 <- aregImpute(formula = ~ sex_h + ageyears_h + hivfinal_h +timespent_h +
sharebedroom_h + site + ageyears_i + sex_i +
smoke_i + hiv_i + microconf_i + num_contacts + totalrooms_h + windows_h,
data = mdata, n.impute=5, burnin = 3, nk=3, type="pmm",
pmmtype = 1)## Iteration 1
Iteration 2
Iteration 3
Iteration 4
Iteration 5
Iteration 6
Iteration 7
Iteration 8
##
## Multiple Imputation using Bootstrap and PMM
##
## aregImpute(formula = ~sex_h + ageyears_h + hivfinal_h + timespent_h +
## sharebedroom_h + site + ageyears_i + sex_i + smoke_i + hiv_i +
## microconf_i + num_contacts + totalrooms_h + windows_h, data = mdata,
## n.impute = 5, nk = 3, type = "pmm", pmmtype = 1, burnin = 3)
##
## n: 2985 p: 14 Imputations: 5 nk: 3
##
## Number of NAs:
## sex_h ageyears_h hivfinal_h timespent_h sharebedroom_h
## 0 0 151 3 0
## site ageyears_i sex_i smoke_i hiv_i
## 0 0 0 0 145
## microconf_i num_contacts totalrooms_h windows_h
## 0 0 203 9
##
## type d.f.
## sex_h c 1
## ageyears_h s 2
## hivfinal_h c 1
## timespent_h c 2
## sharebedroom_h c 1
## site c 1
## ageyears_i s 2
## sex_i c 1
## smoke_i c 2
## hiv_i c 1
## microconf_i c 1
## num_contacts s 2
## totalrooms_h s 2
## windows_h s 1
##
## Transformation of Target Variables Forced to be Linear
##
## R-squares for Predicting Non-Missing Values for Each Variable
## Using Last Imputations of Predictors
## hivfinal_h timespent_h hiv_i totalrooms_h windows_h
## 0.148 0.074 0.265 0.750 0.775
mdata_imp <- as.data.frame(impute.transcan(imp1, imputation=1, data=mdata, list.out=TRUE, pr=FALSE, check=FALSE))
mdata_imp %>% janitor::tabyl(hiv_i) %>% gt()| hiv_i | n | percent |
|---|---|---|
| a) HIV-negative | 1335 | 0.4472362 |
| b) HIV-positive | 1650 | 0.5527638 |
| hivfinal_h | n | percent |
|---|---|---|
| a) HIV negative | 2608 | 0.8737018 |
| b) HIV positive | 377 | 0.1262982 |
| timespent_h | n | percent |
|---|---|---|
| a) every now and again | 364 | 0.1219430 |
| b) part of the day | 1403 | 0.4700168 |
| c) most of the day | 1218 | 0.4080402 |
mdata_imp <- mdata %>%
dplyr::select(record_id, contact_id, tstdiam_h,smoke_h) %>%
bind_cols(mdata_imp)
mdata_imp <- mdata_imp %>%
ungroup() %>%
haven::zap_labels()
mdata_imp <- mdata_imp %>%
mutate(hivfinal_h = as.factor(as.vector(hivfinal_h)),
timespent_h = as.factor(as.vector(timespent_h)),
hiv_i = as.factor(as.vector(hiv_i)),
record_id = as.character(record_id),
contact_id = as.character(contact_id),
totalrooms_h = as.numeric(totalrooms_h))
glimpse(mdata_imp)## Observations: 2,985
## Variables: 18
## $ record_id <chr> "1-00-0101", "1-00-0101", "1-00-0101", "1-00-0101", "1…
## $ contact_id <chr> "1-00-0101-02", "1-00-0101-03", "1-00-0101-05", "1-00-…
## $ tstdiam_h <dbl> 0, 0, 0, 0, 0, NA, NA, 0, 5, 20, NA, NA, 7, 5, 0, 13, …
## $ smoke_h <chr> "a) never smoked", "c) previous smoker", "a) never smo…
## $ sex_h <fct> Female, Male, Male, Female, Male, Male, Female, Male, …
## $ ageyears_h <dbl> 35, 59, 13, 55, 34, 7, 37, 8, 41, 22, 28, 41, 14, 76, …
## $ hivfinal_h <fct> b) HIV positive, a) HIV negative, a) HIV negative, a) …
## $ timespent_h <fct> c) most of the day, b) part of the day, c) most of the…
## $ sharebedroom_h <fct> Yes, No, No, No, No, No, No, No, No, No, No, No, No, N…
## $ site <fct> Mangaung, Mangaung, Mangaung, Mangaung, Mangaung, Mang…
## $ ageyears_i <dbl> 15, 15, 15, 15, 15, 15, 49, 49, 49, 49, 49, 49, 49, 49…
## $ sex_i <fct> Female, Female, Female, Female, Female, Female, Male, …
## $ smoke_i <fct> a) never smoked, a) never smoked, a) never smoked, a) …
## $ hiv_i <fct> b) HIV-positive, b) HIV-positive, b) HIV-positive, b) …
## $ microconf_i <fct> b) Micro-confirmed TB, b) Micro-confirmed TB, b) Micro…
## $ num_contacts <int> 6, 6, 6, 6, 6, 6, 8, 8, 8, 8, 8, 8, 8, 8, 3, 3, 3, 2, …
## $ totalrooms_h <dbl> 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 4, …
## $ windows_h <dbl> 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 10, 10, 10, …
Make a table of baseline characteristics by study site.
First individual-level characteristics.
table1a <- tableby(site ~ sex_i + ageyears_i + bmi_i +
dead_i + xpert_i + smear_i + culture_i +
microconf_i + smoke_i + hiv_i +
coughdays_i,
data = index2, test=TRUE,
numeric.stats=c("medianq1q3"), digits=3)
summary(table1a)| Capricorn (N=407) | Mangaung (N=517) | Total (N=924) | p value | |
|---|---|---|---|---|
| sex_i | 0.073 | |||
| Â Â Â Female | 174 (42.8%) | 191 (36.9%) | 365 (39.5%) | |
| Â Â Â Male | 233 (57.2%) | 326 (63.1%) | 559 (60.5%) | |
| ageyears_i | 0.784 | |||
| Â Â Â Median (Q1, Q3) | 37.000 (29.000, 47.000) | 37.000 (28.000, 48.000) | 37.000 (28.000, 47.250) | |
| bmi_i | 0.288 | |||
| Â Â Â Median (Q1, Q3) | 19.100 (17.100, 22.100) | 18.700 (16.700, 21.600) | 18.800 (16.800, 21.825) | |
| dead_i | 0.407 | |||
| Â Â Â a) Not deceased | 393 (96.6%) | 504 (97.5%) | 897 (97.1%) | |
| Â Â Â b) Deceased | 14 (3.4%) | 13 (2.5%) | 27 (2.9%) | |
| xpert_i | < 0.001 | |||
| Â Â Â a) Xpert negative | 6 (1.5%) | 1 (0.2%) | 7 (0.8%) | |
| Â Â Â b) Xpert positive | 348 (85.5%) | 508 (98.3%) | 856 (92.6%) | |
| Â Â Â c) Xpert not done | 53 (13.0%) | 8 (1.5%) | 61 (6.6%) | |
| smear_i | < 0.001 | |||
| Â Â Â a) Smear negative | 22 (5.4%) | 2 (0.4%) | 24 (2.6%) | |
| Â Â Â b) Smear positive | 126 (31.0%) | 8 (1.5%) | 134 (14.5%) | |
| Â Â Â c) Smear not done | 259 (63.6%) | 507 (98.1%) | 766 (82.9%) | |
| culture_i | < 0.001 | |||
| Â Â Â a) Culture negative | 1 (0.2%) | 2 (0.4%) | 3 (0.3%) | |
| Â Â Â b) Culture positive | 32 (7.9%) | 0 (0.0%) | 32 (3.5%) | |
| Â Â Â c) Culture not done | 374 (91.9%) | 515 (99.6%) | 889 (96.2%) | |
| microconf_i | 0.061 | |||
| Â Â Â a) Not micro-confirmed TB | 14 (3.4%) | 8 (1.5%) | 22 (2.4%) | |
| Â Â Â b) Micro-confirmed TB | 393 (96.6%) | 509 (98.5%) | 902 (97.6%) | |
| smoke_i | < 0.001 | |||
| Â Â Â a) never smoked | 267 (65.6%) | 253 (48.9%) | 520 (56.3%) | |
| Â Â Â b) current smoker | 49 (12.0%) | 127 (24.6%) | 176 (19.0%) | |
| Â Â Â c) previous smoker | 91 (22.4%) | 137 (26.5%) | 228 (24.7%) | |
| hiv_i | 0.021 | |||
| Â Â Â a) HIV-negative | 178 (43.7%) | 207 (40.0%) | 385 (41.7%) | |
| Â Â Â b) HIV-positive | 219 (53.8%) | 278 (53.8%) | 497 (53.8%) | |
| Â Â Â c) HIV-unknown | 10 (2.5%) | 32 (6.2%) | 42 (4.5%) | |
| coughdays_i | 0.782 | |||
| Â Â Â Median (Q1, Q3) | 30.000 (0.000, 60.000) | 30.000 (9.500, 60.000) | 30.000 (7.000, 60.000) |
Now the household contact characteristics
table1b <- tableby(site ~ sex_h + ageyears_h +
airspace_h + timespent_h + sleepsamebed_h + sharebedroom_h +
smoke_h +
hivfinal_h,
data=tstsa, test=TRUE,
numeric.stats=c("medianrange"), digits=1)
summary(table1b)| Capricorn (N=1298) | Mangaung (N=1687) | Total (N=2985) | p value | |
|---|---|---|---|---|
| sex_h | 0.964 | |||
| Â Â Â Female | 803 (61.9%) | 1045 (61.9%) | 1848 (61.9%) | |
| Â Â Â Male | 495 (38.1%) | 642 (38.1%) | 1137 (38.1%) | |
| ageyears_h | 0.075 | |||
| Â Â Â Median (Range) | 17.0 (0.0, 98.0) | 16.0 (0.0, 95.0) | 17.0 (0.0, 98.0) | |
| airspace_h | 0.381 | |||
| Â Â Â N-Miss | 1 | 0 | 1 | |
| Â Â Â No | 0 (0.0%) | 1 (0.1%) | 1 (0.0%) | |
| Â Â Â Yes | 1297 (100.0%) | 1686 (99.9%) | 2983 (100.0%) | |
| timespent_h | < 0.001 | |||
| Â Â Â N-Miss | 2 | 1 | 3 | |
| Â Â Â a) every now and again | 241 (18.6%) | 123 (7.3%) | 364 (12.2%) | |
| Â Â Â b) part of the day | 628 (48.5%) | 773 (45.8%) | 1401 (47.0%) | |
| Â Â Â c) most of the day | 427 (32.9%) | 790 (46.9%) | 1217 (40.8%) | |
| sleepsamebed_h | < 0.001 | |||
| Â Â Â No | 1263 (97.3%) | 1686 (99.9%) | 2949 (98.8%) | |
| Â Â Â Yes | 35 (2.7%) | 1 (0.1%) | 36 (1.2%) | |
| sharebedroom_h | < 0.001 | |||
| Â Â Â No | 1184 (91.2%) | 1303 (77.2%) | 2487 (83.3%) | |
| Â Â Â Yes | 114 (8.8%) | 384 (22.8%) | 498 (16.7%) | |
| smoke_h | 0.004 | |||
| Â Â Â a) never smoked | 1200 (92.4%) | 1498 (88.8%) | 2698 (90.4%) | |
| Â Â Â b) current smoker | 83 (6.4%) | 159 (9.4%) | 242 (8.1%) | |
| Â Â Â c) previous smoker | 15 (1.2%) | 30 (1.8%) | 45 (1.5%) | |
| hivfinal_h | < 0.001 | |||
| Â Â Â N-Miss | 3 | 0 | 3 | |
| Â Â Â a) HIV negative | 1136 (87.7%) | 1349 (80.0%) | 2485 (83.3%) | |
| Â Â Â b) HIV positive | 126 (9.7%) | 223 (13.2%) | 349 (11.7%) | |
| Â Â Â c) HIV unknown | 33 (2.5%) | 115 (6.8%) | 148 (5.0%) |
Now the dwelling characteristics
dwellings <- tstsa %>%
select(record_id, site, num_contacts, totalrooms_h, windows_h,
numsmokers_h) %>%
distinct()
table1c <- tableby(site ~ num_contacts + totalrooms_h + windows_h + numsmokers_h,
data=dwellings, test=TRUE,
numeric.stats=c("medianrange"), digits=1)
summary(table1c)| Capricorn (N=407) | Mangaung (N=517) | Total (N=924) | p value | |
|---|---|---|---|---|
| num_contacts | 0.602 | |||
| Â Â Â Median (Range) | 3.0 (1.0, 14.0) | 3.0 (1.0, 14.0) | 3.0 (1.0, 14.0) | |
| G09. Total rooms in household | < 0.001 | |||
| Â Â Â Median (Range) | 6.0 (1.0, 22.0) | 4.0 (1.0, 10.0) | 5.0 (1.0, 22.0) | |
| G10. How many windows are there in the household? | < 0.001 | |||
| Â Â Â Median (Range) | 8.0 (0.0, 27.0) | 4.0 (0.0, 15.0) | 5.0 (0.0, 27.0) | |
| **G15. How many people smoke tobacco inside the household? __ __ __** | 0.068 | |||
| Â Â Â Median (Range) | 0.0 (0.0, 6.0) | 0.0 (0.0, 4.0) | 0.0 (0.0, 6.0) |
Number of contacts per index case
tstsa %>%
group_by(record_id, site) %>%
count() %>%
group_by(n, site) %>%
summarise(contacts = sum(n)) %>%
ggplot() +
geom_bar(aes(x=n, y=contacts, fill=site), stat="identity") +
facet_wrap(~site) +
theme_bw(base_family = "Oswald") +
theme(legend.position = "none") +
labs(x="Number of household contacts per index case",
y = "N")Graph by age-group of proportion with TST >=5 mm, stratified by key characteristics
agegps <- mdata_imp %>%
mutate(agegroup_h = case_when(ageyears_h <5 ~ "0-4",
ageyears_h >=5 & ageyears_h <10 ~ "05-09",
ageyears_h >=10 & ageyears_h <15 ~ "10-14",
ageyears_h >=15 & ageyears_h <25 ~ "15-24",
ageyears_h >=25 & ageyears_h <35 ~ "25-34",
ageyears_h >=35 & ageyears_h <45 ~ "35-44",
ageyears_h >=45 & ageyears_h <55 ~ "45-54",
ageyears_h >=55 ~ "55+"),
ordered=TRUE) %>%
mutate(tst_5mm = case_when(tstdiam_h<5 ~ 0,
tstdiam_h>=5 ~ 1)) %>%
mutate(tst_10mm = case_when(tstdiam_h<10 ~ 0,
tstdiam_h>=10 ~ 1))
agegps %>%
janitor::tabyl(agegroup_h, site) %>%
adorn_totals("col") %>%
adorn_percentages("col") %>%
adorn_pct_formatting() %>%
adorn_ns() %>%
gt()| agegroup_h | Capricorn | Mangaung | Total |
|---|---|---|---|
| 0-4 | 15.1% (196) | 14.8% (249) | 14.9% (445) |
| 05-09 | 15.9% (206) | 16.2% (274) | 16.1% (480) |
| 10-14 | 13.4% (174) | 15.8% (267) | 14.8% (441) |
| 15-24 | 16.9% (219) | 15.8% (267) | 16.3% (486) |
| 25-34 | 10.7% (139) | 10.4% (176) | 10.6% (315) |
| 35-44 | 6.0% (78) | 7.9% (133) | 7.1% (211) |
| 45-54 | 6.5% (84) | 6.3% (106) | 6.4% (190) |
| 55+ | 15.6% (202) | 12.7% (215) | 14.0% (417) |
| tst_5mm | n | percent | valid_percent |
|---|---|---|---|
| 0 | 2267 | 0.75946399 | 0.8319266 |
| 1 | 458 | 0.15343384 | 0.1680734 |
| NA | 260 | 0.08710218 | NA |
| tst_10mm | n | percent | valid_percent |
|---|---|---|---|
| 0 | 2366 | 0.79262982 | 0.8682569 |
| 1 | 359 | 0.12026801 | 0.1317431 |
| NA | 260 | 0.08710218 | NA |
Look at the bins of TST diameter
tstsa %>%
group_by(tstdiam_h, site) %>%
summarise(n= n()) %>%
mutate(prop = n/sum(n)) %>%
filter(!is.na(tstdiam_h)) %>%
ggplot() +
geom_bar(aes(x=tstdiam_h, y=n), stat = "identity") +
theme_light() +
facet_grid(site~.) +
scale_y_continuous() +
labs(title="Tuberculin skin test reactivity in household contacts",
x = "Diameter (mm)",
y = "Percent")## Don't know how to automatically pick scale for object of type haven_labelled. Defaulting to continuous.
Proportions of household contacts with TST induration >=5mm
#Get the prevalence with TST induration >=5mm overall
g0 <- agegps %>%
group_by(agegroup_h, tst_5mm) %>%
summarise(n=n()) %>%
mutate(sum = sum(n)) %>%
filter(tst_5mm==1) %>%
rowwise %>%
mutate(xx = list(broom::tidy(binom.test(n, sum, conf.level=0.95)))) %>%
tidyr::unnest(xx)
g0 %>% gt()| agegroup_h | tst_5mm | n | sum | estimate | statistic | p.value | parameter | conf.low | conf.high | method | alternative |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0-4 | 1 | 58 | 445 | 0.1303371 | 58 | 9.096183e-61 | 445 | 0.10047844 | 0.1652120 | Exact binomial test | two.sided |
| 05-09 | 1 | 62 | 480 | 0.1291667 | 62 | 6.697507e-66 | 480 | 0.10048393 | 0.1625090 | Exact binomial test | two.sided |
| 10-14 | 1 | 57 | 441 | 0.1292517 | 57 | 1.252760e-60 | 441 | 0.09938363 | 0.1641981 | Exact binomial test | two.sided |
| 15-24 | 1 | 70 | 486 | 0.1440329 | 70 | 6.267708e-61 | 486 | 0.11403254 | 0.1784416 | Exact binomial test | two.sided |
| 25-34 | 1 | 57 | 315 | 0.1809524 | 57 | 1.082772e-31 | 315 | 0.14002836 | 0.2279948 | Exact binomial test | two.sided |
| 35-44 | 1 | 41 | 211 | 0.1943128 | 41 | 7.330713e-20 | 211 | 0.14319869 | 0.2542321 | Exact binomial test | two.sided |
| 45-54 | 1 | 36 | 190 | 0.1894737 | 36 | 1.393906e-18 | 190 | 0.13637455 | 0.2525315 | Exact binomial test | two.sided |
| 55+ | 1 | 77 | 417 | 0.1846523 | 77 | 1.650878e-40 | 417 | 0.14856227 | 0.2252873 | Exact binomial test | two.sided |
#plot
g0 %>%
ggplot() +
geom_bar(aes(x=agegroup_h, y=estimate), fill = "firebrick", stat = "identity") +
geom_errorbar(aes(x=agegroup_h, ymin=conf.low, ymax = conf.high), width = 0.2) +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
theme_bw(base_family="Oswald") +
labs(y="",
x="") +
theme(legend.position = "none") +
theme(axis.text.x=element_text(angle=45, hjust=1))#Get the propotion with TST induraction >=5mm by site
g0_b <- agegps %>%
group_by(agegroup_h, tst_5mm, site) %>%
summarise(n=n()) %>%
mutate(sum = sum(n)) %>%
filter(tst_5mm==1) %>%
rowwise %>%
mutate(xx = list(broom::tidy(binom.test(n, sum, conf.level=0.95)))) %>%
tidyr::unnest(xx)
g0_b %>% gt()| agegroup_h | tst_5mm | site | n | sum | estimate | statistic | p.value | parameter | conf.low | conf.high | method | alternative |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0-4 | 1 | Capricorn | 13 | 58 | 0.2241379 | 13 | 3.011591e-05 | 58 | 0.1250892 | 0.3527239 | Exact binomial test | two.sided |
| 0-4 | 1 | Mangaung | 45 | 58 | 0.7758621 | 45 | 3.011591e-05 | 58 | 0.6472761 | 0.8749108 | Exact binomial test | two.sided |
| 05-09 | 1 | Capricorn | 14 | 62 | 0.2258065 | 14 | 1.742852e-05 | 62 | 0.1293039 | 0.3497369 | Exact binomial test | two.sided |
| 05-09 | 1 | Mangaung | 48 | 62 | 0.7741935 | 48 | 1.742852e-05 | 62 | 0.6502631 | 0.8706961 | Exact binomial test | two.sided |
| 10-14 | 1 | Capricorn | 19 | 57 | 0.3333333 | 19 | 1.634810e-02 | 57 | 0.2140053 | 0.4706476 | Exact binomial test | two.sided |
| 10-14 | 1 | Mangaung | 38 | 57 | 0.6666667 | 38 | 1.634810e-02 | 57 | 0.5293524 | 0.7859947 | Exact binomial test | two.sided |
| 15-24 | 1 | Capricorn | 18 | 70 | 0.2571429 | 18 | 5.849547e-05 | 70 | 0.1600699 | 0.3756144 | Exact binomial test | two.sided |
| 15-24 | 1 | Mangaung | 52 | 70 | 0.7428571 | 52 | 5.849547e-05 | 70 | 0.6243856 | 0.8399301 | Exact binomial test | two.sided |
| 25-34 | 1 | Capricorn | 15 | 57 | 0.2631579 | 15 | 4.600050e-04 | 57 | 0.1553848 | 0.3966275 | Exact binomial test | two.sided |
| 25-34 | 1 | Mangaung | 42 | 57 | 0.7368421 | 42 | 4.600050e-04 | 57 | 0.6033725 | 0.8446152 | Exact binomial test | two.sided |
| 35-44 | 1 | Capricorn | 15 | 41 | 0.3658537 | 15 | 1.172752e-01 | 41 | 0.2212279 | 0.5306375 | Exact binomial test | two.sided |
| 35-44 | 1 | Mangaung | 26 | 41 | 0.6341463 | 26 | 1.172752e-01 | 41 | 0.4693625 | 0.7787721 | Exact binomial test | two.sided |
| 45-54 | 1 | Capricorn | 15 | 36 | 0.4166667 | 15 | 4.050322e-01 | 36 | 0.2551410 | 0.5924348 | Exact binomial test | two.sided |
| 45-54 | 1 | Mangaung | 21 | 36 | 0.5833333 | 21 | 4.050322e-01 | 36 | 0.4075652 | 0.7448590 | Exact binomial test | two.sided |
| 55+ | 1 | Capricorn | 24 | 77 | 0.3116883 | 24 | 1.263227e-03 | 77 | 0.2109468 | 0.4274275 | Exact binomial test | two.sided |
| 55+ | 1 | Mangaung | 53 | 77 | 0.6883117 | 53 | 1.263227e-03 | 77 | 0.5725725 | 0.7890532 | Exact binomial test | two.sided |
Graph with two cut-offs for TST diameter
agegps <- mdata_imp %>%
mutate(agegroup_h = case_when(ageyears_h <5 ~ "0-4",
ageyears_h >=5 & ageyears_h <10 ~ "05-09",
ageyears_h >=10 & ageyears_h <15 ~ "10-14",
ageyears_h >=15 & ageyears_h <25 ~ "15-24",
ageyears_h >=25 & ageyears_h <35 ~ "25-34",
ageyears_h >=35 & ageyears_h <45 ~ "35-44",
ageyears_h >=45 & ageyears_h <55 ~ "45-54",
ageyears_h >=55 ~ "55+"),
ordered=TRUE) %>%
mutate(tst_5mm = case_when(tstdiam_h<5 ~ 0,
tstdiam_h>=5 ~ 1)) %>%
mutate(tst_10mm = case_when(tstdiam_h<10 ~ 0,
tstdiam_h>=10 ~ 1)) %>%
mutate(hiv_i = case_when(hiv_i== "a) HIV-negative" ~ "Index HIV -ve",
hiv_i== "b) HIV-positive" ~ "Index HIV +ve")) %>%
mutate(hivfinal_h = case_when(hivfinal_h == "a) HIV negative" ~ "Contact HIV -ve",
hivfinal_h == "b) HIV positive" ~ "Contact HIV +ve")) %>%
mutate(timespent_h = case_when(timespent_h == "a) every now and again" ~ "Contact rarely",
timespent_h == "b) part of the day" ~ "Contact part of day",
timespent_h == "c) most of the day" ~ "Contact most of day")) %>%
mutate(sex_i = case_when(sex_i == "Male" ~ "Index male",
sex_i == "Female" ~ "Index female")) %>%
mutate(sex_h = case_when(sex_h == "Male" ~ "Contact male",
sex_h == "Female" ~ "Contact female"))
# Make some tables of the outcome variables
agegps %>%
janitor::tabyl(tst_5mm) %>%
gt()| tst_5mm | n | percent | valid_percent |
|---|---|---|---|
| 0 | 2267 | 0.75946399 | 0.8319266 |
| 1 | 458 | 0.15343384 | 0.1680734 |
| NA | 260 | 0.08710218 | NA |
| tst_10mm | n | percent | valid_percent |
|---|---|---|---|
| 0 | 2366 | 0.79262982 | 0.8682569 |
| 1 | 359 | 0.12026801 | 0.1317431 |
| NA | 260 | 0.08710218 | NA |
#Now cross tab between agegroup and sex
agegps %>%
janitor::tabyl(tst_5mm, agegroup_h, site, show_na = FALSE) %>%
janitor::adorn_percentages("col") ## $Capricorn
## tst_5mm 0-4 05-09 10-14 15-24 25-34 35-44
## 0 0.92397661 0.92893401 0.8908046 0.91705069 0.887218 0.7945205
## 1 0.07602339 0.07106599 0.1091954 0.08294931 0.112782 0.2054795
## 45-54 55+
## 0.8148148 0.8787879
## 0.1851852 0.1212121
##
## $Mangaung
## tst_5mm 0-4 05-09 10-14 15-24 25-34 35-44 45-54
## 0 0.7804878 0.8181818 0.8473896 0.776824 0.7042254 0.74 0.7789474
## 1 0.2195122 0.1818182 0.1526104 0.223176 0.2957746 0.26 0.2210526
## 55+
## 0.7253886
## 0.2746114
agegps %>%
filter(!is.na(tstdiam_h)) %>%
janitor::tabyl(tst_10mm, agegroup_h, site, show_na = FALSE) %>%
janitor::adorn_percentages("col") ## $Capricorn
## tst_10mm 0-4 05-09 10-14 15-24 25-34 35-44
## 0 0.97660819 0.96446701 0.93678161 0.94470046 0.8947368 0.890411
## 1 0.02339181 0.03553299 0.06321839 0.05529954 0.1052632 0.109589
## 45-54 55+
## 0.8395062 0.93434343
## 0.1604938 0.06565657
##
## $Mangaung
## tst_10mm 0-4 05-09 10-14 15-24 25-34 35-44 45-54
## 0 0.795122 0.8484848 0.8835341 0.806867 0.7464789 0.77 0.8105263
## 1 0.204878 0.1515152 0.1164659 0.193133 0.2535211 0.23 0.1894737
## 55+
## 0.7720207
## 0.2279793
#write a function to automate these
gdata <- function(data, groupvar, depvar) {
data %>%
ungroup() %>%
select(agegroup_h, {{depvar}}, .data[[groupvar]]) %>%
filter(!is.na({{depvar}})) %>%
group_by(agegroup_h, .data[[groupvar]], {{depvar}}) %>%
summarise(n=n()) %>%
mutate(sum = sum(n)) %>%
filter({{depvar}}==1) %>%
rowwise %>%
mutate(xx = list(broom::tidy(binom.test(n, sum, conf.level=0.95)))) %>%
tidyr::unnest(xx) %>%
select(agegroup_h, var = .data[[groupvar]], n, sum, estimate, conf.low, conf.high)
}
#select key variables for exploratory plotting
plot_vars <- agegps %>%
ungroup() %>%
select(site, sex_i, sex_h, hiv_i, hivfinal_h, timespent_h) %>%
names()
five_mm <- map(plot_vars, ~gdata(data = agegps, groupvar = .x, depvar = tst_5mm))
five_mm <- map(five_mm, ~mutate(., tst=">=5mm"))
ten_mm <- map(plot_vars, ~gdata(data = agegps, groupvar = .x, depvar = tst_10mm))
ten_mm <- map(ten_mm, ~mutate(., tst=">=10mm"))
gdata_out <- bind_rows(five_mm, ten_mm)## Warning in bind_rows_(x, .id): binding factor and character vector, coercing
## into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector, coercing
## into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector, coercing
## into character vector
gdata_out <- gdata_out %>%
mutate(var = fct_relevel(var,
"Mangaung", "Capricorn", "Index male", "Index female",
"Contact male", "Contact female",
"Index HIV +ve", "Index HIV -ve",
"Contact HIV +ve", "Contact HIV -ve",
"Contact rarely", "Contact part of day", "Contact most of day"))
ggplot(data = gdata_out) +
geom_errorbar(aes(x=agegroup_h, ymin=conf.low, ymax = conf.high), width = 0.2) +
geom_point(aes(x=agegroup_h, y=estimate, colour=tst)) +
scale_y_continuous(labels = scales::percent_format(accuracy = 1),
limits = c(0,0.80),
breaks = c(0,0.15,0.30,0.45,0.60,0.75)) +
facet_grid(fct_rev(tst) ~var) +
theme_bw(base_family="Oswald") +
labs(x="Age group of contact (years)",
y="Percentage with tuberculin skin test induration (95% CI)") +
theme(legend.position = "none") +
theme(axis.text.x=element_text(angle=90, hjust=1))Put this into tabular form
#write a function to automate these
tdata <- function(data, groupvar, depvar) {
data %>%
ungroup() %>%
select(site, {{depvar}}, .data[[groupvar]]) %>%
filter(!is.na({{depvar}})) %>%
group_by(site, .data[[groupvar]], {{depvar}}) %>%
summarise(n=n()) %>%
mutate(sum = sum(n)) %>%
filter({{depvar}}==1) %>%
rowwise %>%
mutate(xx = list(broom::tidy(binom.test(n, sum, conf.level=0.95)))) %>%
tidyr::unnest(xx) %>%
select(site, var = .data[[groupvar]], n, sum, estimate, conf.low, conf.high)
}
tab_vars <- agegps %>%
ungroup() %>%
select(sex_i, hiv_i, microconf_i, sex_h, hivfinal_h, timespent_h, sharebedroom_h, smoke_h) %>%
names()
five_mm2 <- map(tab_vars, ~tdata(data = agegps, groupvar = .x, depvar = tst_5mm))
five_mm2 <- map(five_mm2, ~mutate(., tst=">=5mm"))
ten_mm2 <- map(tab_vars, ~tdata(data = agegps, groupvar = .x, depvar = tst_10mm))
ten_mm2 <- map(ten_mm2, ~mutate(., tst=">=10mm"))
tdata_out <- bind_rows(five_mm2, ten_mm2)## Warning in bind_rows_(x, .id): binding character and factor vector, coercing
## into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector, coercing
## into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector, coercing
## into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector, coercing
## into character vector
tdata_out %>%
pivot_wider(names_from = site,
values_from = c(n, sum, estimate, conf.low, conf.high)) %>%
mutate(var = case_when(var =="No" ~ "Shares bedroom",
var =="Yes" ~ "Doesn't share bedroom",
TRUE ~ var)) %>%
mutate_at(vars(estimate_Capricorn:conf.high_Mangaung), scales::label_percent(accuracy = 0.1, trim=FALSE)) %>%
mutate(n_capricorn = glue("{n_Capricorn}/{sum_Capricorn} ({estimate_Capricorn}, {conf.low_Capricorn}-{conf.high_Capricorn})")) %>%
mutate(n_mangaung = glue("{n_Mangaung}/{sum_Mangaung} ({estimate_Mangaung}, {conf.low_Mangaung}-{conf.high_Mangaung})")) %>%
select(var, tst, n_capricorn, n_mangaung) %>%
gt()| var | tst | n_capricorn | n_mangaung |
|---|---|---|---|
| Index female | >=5mm | 64/540 (11.9%, 9.2%-14.9%) | 131/599 (21.9%, 18.6%-25.4%) |
| Index male | >=5mm | 69/704 ( 9.8%, 7.7%-12.2%) | 194/882 (22.0%, 19.3%-24.9%) |
| Index HIV -ve | >=5mm | 54/554 ( 9.7%, 7.4%-12.5%) | 161/649 (24.8%, 21.5%-28.3%) |
| Index HIV +ve | >=5mm | 79/690 (11.4%, 9.2%-14.1%) | 164/832 (19.7%, 17.1%-22.6%) |
| a) Not micro-confirmed TB | >=5mm | 3/51 ( 5.9%, 1.2%-16.2%) | 7/27 (25.9%, 11.1%-46.3%) |
| b) Micro-confirmed TB | >=5mm | 130/1193 (10.9%, 9.2%-12.8%) | 318/1454 (21.9%, 19.8%-24.1%) |
| Contact female | >=5mm | 81/769 (10.5%, 8.5%-12.9%) | 216/925 (23.4%, 20.7%-26.2%) |
| Contact male | >=5mm | 52/475 (10.9%, 8.3%-14.1%) | 109/556 (19.6%, 16.4%-23.2%) |
| Contact HIV -ve | >=5mm | 113/1129 (10.0%, 8.3%-11.9%) | 276/1285 (21.5%, 19.3%-23.8%) |
| Contact HIV +ve | >=5mm | 20/115 (17.4%, 11.0%-25.6%) | 49/196 (25.0%, 19.1%-31.7%) |
| Contact most of day | >=5mm | 43/405 (10.6%, 7.8%-14.0%) | 142/663 (21.4%, 18.4%-24.7%) |
| Contact part of day | >=5mm | 65/611 (10.6%, 8.3%-13.4%) | 154/701 (22.0%, 19.0%-25.2%) |
| Contact rarely | >=5mm | 25/228 (11.0%, 7.2%-15.8%) | 29/117 (24.8%, 17.3%-33.6%) |
| Shares bedroom | >=5mm | 112/1134 ( 9.9%, 8.2%-11.8%) | 244/1162 (21.0%, 18.7%-23.5%) |
| Doesn't share bedroom | >=5mm | 21/110 (19.1%, 12.2%-27.7%) | 81/319 (25.4%, 20.7%-30.5%) |
| a) never smoked | >=5mm | 115/1147 (10.0%, 8.3%-11.9%) | 287/1318 (21.8%, 19.6%-24.1%) |
| b) current smoker | >=5mm | 15/82 (18.3%, 10.6%-28.4%) | 33/134 (24.6%, 17.6%-32.8%) |
| c) previous smoker | >=5mm | 3/15 (20.0%, 4.3%-48.1%) | 5/29 (17.2%, 5.8%-35.8%) |
| Index female | >=10mm | 41/540 ( 7.6%, 5.5%-10.2%) | 109/599 (18.2%, 15.2%-21.5%) |
| Index male | >=10mm | 41/704 ( 5.8%, 4.2%- 7.8%) | 168/882 (19.0%, 16.5%-21.8%) |
| Index HIV -ve | >=10mm | 32/554 ( 5.8%, 4.0%- 8.1%) | 140/649 (21.6%, 18.5%-24.9%) |
| Index HIV +ve | >=10mm | 50/690 ( 7.2%, 5.4%- 9.4%) | 137/832 (16.5%, 14.0%-19.2%) |
| a) Not micro-confirmed TB | >=10mm | 2/51 ( 3.9%, 0.5%-13.5%) | 6/27 (22.2%, 8.6%-42.3%) |
| b) Micro-confirmed TB | >=10mm | 80/1193 ( 6.7%, 5.4%- 8.3%) | 271/1454 (18.6%, 16.7%-20.7%) |
| Contact female | >=10mm | 51/769 ( 6.6%, 5.0%- 8.6%) | 181/925 (19.6%, 17.1%-22.3%) |
| Contact male | >=10mm | 31/475 ( 6.5%, 4.5%- 9.1%) | 96/556 (17.3%, 14.2%-20.7%) |
| Contact HIV -ve | >=10mm | 67/1129 ( 5.9%, 4.6%- 7.5%) | 234/1285 (18.2%, 16.1%-20.4%) |
| Contact HIV +ve | >=10mm | 15/115 (13.0%, 7.5%-20.6%) | 43/196 (21.9%, 16.4%-28.4%) |
| Contact most of day | >=10mm | 24/405 ( 5.9%, 3.8%- 8.7%) | 122/663 (18.4%, 15.5%-21.6%) |
| Contact part of day | >=10mm | 40/611 ( 6.5%, 4.7%- 8.8%) | 133/701 (19.0%, 16.1%-22.1%) |
| Contact rarely | >=10mm | 18/228 ( 7.9%, 4.7%-12.2%) | 22/117 (18.8%, 12.2%-27.1%) |
| Shares bedroom | >=10mm | 72/1134 ( 6.3%, 5.0%- 7.9%) | 205/1162 (17.6%, 15.5%-20.0%) |
| Doesn't share bedroom | >=10mm | 10/110 ( 9.1%, 4.4%-16.1%) | 72/319 (22.6%, 18.1%-27.6%) |
| a) never smoked | >=10mm | 72/1147 ( 6.3%, 4.9%- 7.8%) | 244/1318 (18.5%, 16.5%-20.7%) |
| b) current smoker | >=10mm | 9/82 (11.0%, 5.1%-19.8%) | 28/134 (20.9%, 14.4%-28.8%) |
| c) previous smoker | >=10mm | 1/15 ( 6.7%, 0.2%-31.9%) | 5/29 (17.2%, 5.8%-35.8%) |
A graph of TST positivity by age of household contact
gdata_out2 <- gdata_out %>%
filter(var %in% c("Capricorn", "Mangaung")) %>%
mutate(var = factor(var, levels=c("Capricorn", "Mangaung"), labels=c("Capricorn", "Mangaung")),
tst = factor(tst, levels=c(">=5mm", ">=10mm"), labels=c(">=5mm", ">=10mm")))
gdata_out2 %>%
ggplot() +
geom_line(aes(y=estimate, x=agegroup_h, group=1)) +
geom_point(aes(y=estimate, x=agegroup_h, group=1)) +
geom_line(aes(y=conf.low, x=agegroup_h), group=1, linetype=4) +
geom_line(aes(y=conf.high, x=agegroup_h), group=1, linetype=4) +
facet_grid(tst~ var) +
theme_bw(base_family = "Open Sans Condensed Light") +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
labs(x="Age group of household contact (years)",
y="Percentage with TST induration (95% confidence interval)")Supplemental table 1: Compare characteristics of household contacts who did and did not receive TST.
s1 <- tstsa %>%
mutate(missing_tst = case_when(is.na(tstdiam_h) ~ "TST not done",
TRUE ~ "TST done"))
table_s1b <- tableby(missing_tst ~ site + sex_h + ageyears_h + bmi_h + relationship_h +
employment_h +
airspace_h + timespent_h + sleepsamebed_h + sharebedroom_h +
smoke_h + alcohol_h +
hivfinal_h + art_h + diabetes_h,
data=s1, test = FALSE,
numeric.stats=c("medianq1q3"),cat.stats = c("countrowpct"), digits=1)
summary(table_s1b)| TST done (N=2725) | TST not done (N=260) | Total (N=2985) | |
|---|---|---|---|
| site | |||
| Â Â Â Capricorn | 1244 (95.8%) | 54 (4.2%) | 1298 (100.0%) |
| Â Â Â Mangaung | 1481 (87.8%) | 206 (12.2%) | 1687 (100.0%) |
| sex_h | |||
| Â Â Â Female | 1694 (91.7%) | 154 (8.3%) | 1848 (100.0%) |
| Â Â Â Male | 1031 (90.7%) | 106 (9.3%) | 1137 (100.0%) |
| ageyears_h | |||
| Â Â Â Median (Q1, Q3) | 16.0 (8.0, 37.0) | 21.0 (4.0, 37.2) | 17.0 (8.0, 37.0) |
| bmi_h | |||
| Â Â Â Median (Q1, Q3) | 20.2 (16.6, 25.7) | 19.8 (16.4, 26.4) | 20.1 (16.6, 25.8) |
| relationship_h | |||
| Â Â Â Brother/sister | 433 (91.9%) | 38 (8.1%) | 471 (100.0%) |
| Â Â Â Child | 671 (91.5%) | 62 (8.5%) | 733 (100.0%) |
| Â Â Â Grandchild | 273 (89.8%) | 31 (10.2%) | 304 (100.0%) |
| Â Â Â Grandparent | 73 (96.1%) | 3 (3.9%) | 76 (100.0%) |
| Â Â Â Other | 743 (93.0%) | 56 (7.0%) | 799 (100.0%) |
| Â Â Â Parent/Parent-in-law | 318 (92.2%) | 27 (7.8%) | 345 (100.0%) |
| Â Â Â Spouse | 213 (83.2%) | 43 (16.8%) | 256 (100.0%) |
| employment_h | |||
| Â Â Â Agriculture | 5 (71.4%) | 2 (28.6%) | 7 (100.0%) |
| Â Â Â Construction/Manufacturing/Transport | 38 (86.4%) | 6 (13.6%) | 44 (100.0%) |
| Â Â Â Hospitality | 21 (91.3%) | 2 (8.7%) | 23 (100.0%) |
| Â Â Â Not employed | 2564 (91.7%) | 233 (8.3%) | 2797 (100.0%) |
| Â Â Â Other | 97 (85.1%) | 17 (14.9%) | 114 (100.0%) |
| airspace_h | |||
| Â Â Â No | 0 (0.0%) | 1 (100.0%) | 1 (100.0%) |
| Â Â Â Yes | 2724 (91.3%) | 259 (8.7%) | 2983 (100.0%) |
| timespent_h | |||
| Â Â Â a) every now and again | 345 (94.8%) | 19 (5.2%) | 364 (100.0%) |
| Â Â Â b) part of the day | 1310 (93.5%) | 91 (6.5%) | 1401 (100.0%) |
| Â Â Â c) most of the day | 1068 (87.8%) | 149 (12.2%) | 1217 (100.0%) |
| sleepsamebed_h | |||
| Â Â Â No | 2689 (91.2%) | 260 (8.8%) | 2949 (100.0%) |
| Â Â Â Yes | 36 (100.0%) | 0 (0.0%) | 36 (100.0%) |
| sharebedroom_h | |||
| Â Â Â No | 2296 (92.3%) | 191 (7.7%) | 2487 (100.0%) |
| Â Â Â Yes | 429 (86.1%) | 69 (13.9%) | 498 (100.0%) |
| smoke_h | |||
| Â Â Â a) never smoked | 2465 (91.4%) | 233 (8.6%) | 2698 (100.0%) |
| Â Â Â b) current smoker | 216 (89.3%) | 26 (10.7%) | 242 (100.0%) |
| Â Â Â c) previous smoker | 44 (97.8%) | 1 (2.2%) | 45 (100.0%) |
| alcohol_h | |||
| Â Â Â No | 2406 (91.3%) | 229 (8.7%) | 2635 (100.0%) |
| Â Â Â Yes | 319 (91.1%) | 31 (8.9%) | 350 (100.0%) |
| hivfinal_h | |||
| Â Â Â a) HIV negative | 2335 (94.0%) | 150 (6.0%) | 2485 (100.0%) |
| Â Â Â b) HIV positive | 293 (84.0%) | 56 (16.0%) | 349 (100.0%) |
| Â Â Â c) HIV unknown | 94 (63.5%) | 54 (36.5%) | 148 (100.0%) |
| art_h | |||
| Â Â Â a) Not taking ART | 35 (81.4%) | 8 (18.6%) | 43 (100.0%) |
| Â Â Â b) Taking ART | 207 (73.4%) | 75 (26.6%) | 282 (100.0%) |
| diabetes_h | |||
| Â Â Â No | 2681 (91.3%) | 255 (8.7%) | 2936 (100.0%) |
| Â Â Â Yes | 43 (93.5%) | 3 (6.5%) | 46 (100.0%) |
DAG was drawn at dagitty.net
Minimal sufficient adjustment sets for estimating the direct effect of Index case HIV status on Latent TB infection: - Index case age, - Province (site)
Fit a data-generating model including priors only to check priors are sensible.
prior <- c(
prior(normal(0, 2), class = Intercept),
prior(normal(0, 2), class = "b"),
prior(cauchy(0,1), sd)
)
#fit model
prior_fit <- brm(tst_5mm ~
#Main exposure
hiv_i +
#Index case characteristics
ageyears_i +
#Household characteristics
site +
#random intercepts for househod
(1 | record_id),
data=agegps,
family = bernoulli,
prior = prior,
seed = 13,
chains = 1,
sample_prior = "only",
control=list(adapt_delta=0.99))## Warning: Rows containing NAs were excluded from the model.
## Compiling the C++ model
## Trying to compile a simple C file
## Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
## clang -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG -I"/Library/Frameworks/R.framework/Versions/3.6/Resources/library/Rcpp/include/" -I"/Library/Frameworks/R.framework/Versions/3.6/Resources/library/RcppEigen/include/" -I"/Library/Frameworks/R.framework/Versions/3.6/Resources/library/RcppEigen/include/unsupported" -I"/Library/Frameworks/R.framework/Versions/3.6/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/3.6/Resources/library/StanHeaders/include/src/" -I"/Library/Frameworks/R.framework/Versions/3.6/Resources/library/StanHeaders/include/" -I"/Library/Frameworks/R.framework/Versions/3.6/Resources/library/rstan/include" -DEIGEN_NO_DEBUG -D_REENTRANT -DBOOST_DISABLE_ASSERTS -DBOOST_PENDING_INTEGER_LOG2_HPP -include stan/math/prim/mat/fun/Eigen.hpp -isysroot /Library/Developer/CommandLineTools/SDKs/MacOSX.sdk -I/usr/local/include -fPIC -isysroot /Library/Developer/CommandLineTools/SDKs/MacOSX.sdk -c foo.c -o foo.o
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/3.6/Resources/library/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp:13:
## In file included from /Library/Frameworks/R.framework/Versions/3.6/Resources/library/RcppEigen/include/Eigen/Dense:1:
## In file included from /Library/Frameworks/R.framework/Versions/3.6/Resources/library/RcppEigen/include/Eigen/Core:88:
## /Library/Frameworks/R.framework/Versions/3.6/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:613:1: error: unknown type name 'namespace'
## namespace Eigen {
## ^
## /Library/Frameworks/R.framework/Versions/3.6/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:613:16: error: expected ';' after top level declarator
## namespace Eigen {
## ^
## ;
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/3.6/Resources/library/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp:13:
## In file included from /Library/Frameworks/R.framework/Versions/3.6/Resources/library/RcppEigen/include/Eigen/Dense:1:
## /Library/Frameworks/R.framework/Versions/3.6/Resources/library/RcppEigen/include/Eigen/Core:96:10: fatal error: 'complex' file not found
## #include <complex>
## ^~~~~~~~~
## 3 errors generated.
## make: *** [foo.o] Error 1
## Start sampling
##
## SAMPLING FOR MODEL 'f5ff6fdc8040aa30a1d89deb8cfd80b3' NOW (CHAIN 1).
## Chain 1:
## Chain 1: Gradient evaluation took 0.000347 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 3.47 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1:
## Chain 1:
## Chain 1: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 1: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 1: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 1: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 1: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 1: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 1: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 1: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 1: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 1: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 1: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 1: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 1:
## Chain 1: Elapsed Time: 9.12827 seconds (Warm-up)
## Chain 1: 8.97351 seconds (Sampling)
## Chain 1: 18.1018 seconds (Total)
## Chain 1:
## Family: bernoulli
## Links: mu = logit
## Formula: tst_5mm ~ hiv_i + ageyears_i + site + (1 | record_id)
## Data: agegps (Number of observations: 2725)
## Samples: 1 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 1000
##
## Group-Level Effects:
## ~record_id (Number of levels: 877)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 4.36 17.26 0.03 24.24 1.00 1899 559
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept -2.10 83.54 -168.99 171.07 1.00 3000 473
## hiv_iIndexHIVPve -0.02 2.13 -4.08 3.93 1.00 3000 710
## ageyears_i 0.06 2.20 -4.51 4.52 1.00 3000 441
## siteMangaung -0.01 1.96 -3.98 3.64 1.00 2567 523
##
## Samples were drawn using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
prior_fe <- fixef(prior_fit, summary = TRUE, robust = FALSE, probs = c(0.025, 0.975)) %>%
as.data.frame() %>%
rownames_to_column() %>%
filter(rowname != "Intercept") %>%
select(rowname, Q2.5, Q97.5) %>%
mutate_at(vars(Q2.5, Q97.5), exp)
prior_fe %>%
gt()## Warning: `prepend()` is deprecated as of rlang 0.4.0.
##
## Vector tools are now out of scope for rlang to make it a more
## focused package.
## This warning is displayed once per session.
| Q2.5 | Q97.5 | |
|---|---|---|
| hiv_iIndexHIVPve | 0.01685077 | 51.05898 |
| ageyears_i | 0.01104263 | 91.68674 |
| siteMangaung | 0.01876772 | 38.05673 |
So these are pretty diffuse priors.
Model 1: estimating the proportion of household contacts with TST induration >=5mm.
#define priors
prior <- c(
prior(normal(0, 2), class = Intercept),
prior(normal(0, 2), class = "b"),
prior(cauchy(0,1), sd)
)
#fit model
fit1 <- brm(tst_5mm ~
#Main exposure
hiv_i +
#Index case characteristics
ageyears_i +
#Household characteristics
site +
#random intercepts for househod
(1 | record_id),
data=agegps,
family = bernoulli,
prior = prior,
seed = 13,
chains = 3,
control=list(adapt_delta=0.99))## Warning: Rows containing NAs were excluded from the model.
## Compiling the C++ model
## recompiling to avoid crashing R session
## Trying to compile a simple C file
## Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
## clang -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG -I"/Library/Frameworks/R.framework/Versions/3.6/Resources/library/Rcpp/include/" -I"/Library/Frameworks/R.framework/Versions/3.6/Resources/library/RcppEigen/include/" -I"/Library/Frameworks/R.framework/Versions/3.6/Resources/library/RcppEigen/include/unsupported" -I"/Library/Frameworks/R.framework/Versions/3.6/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/3.6/Resources/library/StanHeaders/include/src/" -I"/Library/Frameworks/R.framework/Versions/3.6/Resources/library/StanHeaders/include/" -I"/Library/Frameworks/R.framework/Versions/3.6/Resources/library/rstan/include" -DEIGEN_NO_DEBUG -D_REENTRANT -DBOOST_DISABLE_ASSERTS -DBOOST_PENDING_INTEGER_LOG2_HPP -include stan/math/prim/mat/fun/Eigen.hpp -isysroot /Library/Developer/CommandLineTools/SDKs/MacOSX.sdk -I/usr/local/include -fPIC -isysroot /Library/Developer/CommandLineTools/SDKs/MacOSX.sdk -c foo.c -o foo.o
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/3.6/Resources/library/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp:13:
## In file included from /Library/Frameworks/R.framework/Versions/3.6/Resources/library/RcppEigen/include/Eigen/Dense:1:
## In file included from /Library/Frameworks/R.framework/Versions/3.6/Resources/library/RcppEigen/include/Eigen/Core:88:
## /Library/Frameworks/R.framework/Versions/3.6/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:613:1: error: unknown type name 'namespace'
## namespace Eigen {
## ^
## /Library/Frameworks/R.framework/Versions/3.6/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:613:16: error: expected ';' after top level declarator
## namespace Eigen {
## ^
## ;
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/3.6/Resources/library/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp:13:
## In file included from /Library/Frameworks/R.framework/Versions/3.6/Resources/library/RcppEigen/include/Eigen/Dense:1:
## /Library/Frameworks/R.framework/Versions/3.6/Resources/library/RcppEigen/include/Eigen/Core:96:10: fatal error: 'complex' file not found
## #include <complex>
## ^~~~~~~~~
## 3 errors generated.
## make: *** [foo.o] Error 1
## Start sampling
##
## SAMPLING FOR MODEL 'f5ff6fdc8040aa30a1d89deb8cfd80b3' NOW (CHAIN 1).
## Chain 1:
## Chain 1: Gradient evaluation took 0.000444 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 4.44 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1:
## Chain 1:
## Chain 1: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 1: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 1: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 1: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 1: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 1: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 1: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 1: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 1: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 1: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 1: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 1: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 1:
## Chain 1: Elapsed Time: 38.6236 seconds (Warm-up)
## Chain 1: 15.3151 seconds (Sampling)
## Chain 1: 53.9388 seconds (Total)
## Chain 1:
##
## SAMPLING FOR MODEL 'f5ff6fdc8040aa30a1d89deb8cfd80b3' NOW (CHAIN 2).
## Chain 2:
## Chain 2: Gradient evaluation took 0.000203 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 2.03 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2:
## Chain 2:
## Chain 2: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 2: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 2: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 2: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 2: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 2: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 2: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 2: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 2: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 2: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 2: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 2: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 2:
## Chain 2: Elapsed Time: 39.4397 seconds (Warm-up)
## Chain 2: 15.7339 seconds (Sampling)
## Chain 2: 55.1736 seconds (Total)
## Chain 2:
##
## SAMPLING FOR MODEL 'f5ff6fdc8040aa30a1d89deb8cfd80b3' NOW (CHAIN 3).
## Chain 3:
## Chain 3: Gradient evaluation took 0.000206 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 2.06 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3:
## Chain 3:
## Chain 3: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 3: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 3: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 3: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 3: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 3: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 3: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 3: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 3: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 3: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 3: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 3: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 3:
## Chain 3: Elapsed Time: 39.3199 seconds (Warm-up)
## Chain 3: 15.3525 seconds (Sampling)
## Chain 3: 54.6724 seconds (Total)
## Chain 3:
## Warning: Method 'stanplot' is deprecated. Please use 'mcmc_plot' instead.
## Family: bernoulli
## Links: mu = logit
## Formula: tst_5mm ~ hiv_i + ageyears_i + site + (1 | record_id)
## Data: agegps (Number of observations: 2725)
## Samples: 3 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 3000
##
## Group-Level Effects:
## ~record_id (Number of levels: 877)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 1.65 0.14 1.38 1.95 1.00 927 1375
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept -1.98 0.30 -2.57 -1.39 1.00 1820 2007
## hiv_iIndexHIVPve -0.23 0.18 -0.59 0.13 1.00 2184 2272
## ageyears_i -0.02 0.01 -0.04 -0.01 1.00 2126 2320
## siteMangaung 1.13 0.19 0.76 1.52 1.00 1966 2288
##
## Samples were drawn using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
## Using 10 posterior samples for ppc type 'dens_overlay' by default.
Hypothesis: do household contacts of HIV-positive index cases have a lower probability of TST positivity?
## b_hiv_iIndexHIVPve
## 0.896
Look at model output
#Look at the posterior odds ratios
f1_graph_data <- posterior_samples(fit1)
f1_graph_data <- f1_graph_data %>%
select(starts_with("b_")) %>%
gather(key, value) %>%
group_by(key) %>%
median_qi(value, .width=0.95) %>%
mutate_at(vars(value, .lower, .upper), exp) %>%
mutate(model = "tst >=5mm")
f1_graph_data %>%
gt() %>%
fmt_number(
columns = vars(value, .lower, .upper),
decimals = 2,
use_seps = FALSE
)| key | value | .lower | .upper | .width | .point | .interval | model |
|---|---|---|---|---|---|---|---|
| b_ageyears_i | 0.98 | 0.96 | 0.99 | 0.95 | median | qi | tst >=5mm |
| b_hiv_iIndexHIVPve | 0.79 | 0.56 | 1.14 | 0.95 | median | qi | tst >=5mm |
| b_Intercept | 0.14 | 0.08 | 0.25 | 0.95 | median | qi | tst >=5mm |
| b_siteMangaung | 3.08 | 2.13 | 4.58 | 0.95 | median | qi | tst >=5mm |
# get the fitted estimates
nd <- agegps %>%
expand(hiv_i, site,
ageyears_i = modelr::seq_range(ageyears_i, n=100)
)
fitted_fit1 <- fitted(fit1, re_formula = NA, newdata = nd)
fitted_graph_data <- cbind(nd, fitted_fit1)
#Graph fitted estimates
f1_graph_a <- fitted_graph_data %>%
ggplot() +
geom_line(aes(y=Estimate, x=ageyears_i, group=1)) +
geom_line(aes(y=Q2.5, x=ageyears_i), group=1, linetype=4) +
geom_line(aes(y=Q97.5, x=ageyears_i), group=1, linetype=4) +
facet_grid(hiv_i~ site) +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
theme_bw(base_family = "Open Sans Condensed Light") +
labs(x="",
y="",
title = paste("A) Percentage (95% credible interval) of household contacts with TST induration", "\u2265","5mm"))
f1_graph_aAlternative graph
fitted_fit1_alt <- fitted(fit1, re_formula = NA, newdata = nd, probs = c(0.025, 0.05, 0.1, 0.25, 0.33, 0.66, 0.75, 0.9, 0.95, 0.975))
fitted_graph_data_alt <- cbind(nd, fitted_fit1_alt)
#Graph fitted estimates
f1_graph_a_alt <- fitted_graph_data_alt %>%
ggplot() +
geom_ribbon(aes(ymin=Q2.5, ymax=Q97.5, x=ageyears_i), group=1, fill="#EDF8E9") +
geom_ribbon(aes(ymin=Q5, ymax=Q95, x=ageyears_i), group=1, fill="#BAE4B3") +
geom_ribbon(aes(ymin=Q10, ymax=Q90, x=ageyears_i), group=1, fill="#74C476") +
geom_ribbon(aes(ymin=Q25, ymax=Q75, x=ageyears_i), group=1, fill="#31A354") +
geom_ribbon(aes(ymin=Q33, ymax=Q66, x=ageyears_i), group=1, fill="#006D2C") +
geom_line(aes(y=Estimate, x=ageyears_i, group=1)) +
facet_grid(hiv_i~ site) +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
theme_bw(base_family = "Open Sans Condensed Light") +
labs(x="",
y="",
title = paste("A) Mean percentage (uncertainty intervals) of household contacts with TST induration", "\u2265","5mm"))
f1_graph_a_alt#define priors
prior <- c(
prior(normal(0, 2), class = Intercept),
prior(normal(0, 2), class = "b"),
prior(cauchy(0,1), sd)
)
#fit model
fit2 <- brm(tst_10mm ~
#Main exposure
hiv_i +
#Index case characteristics
ageyears_i +
#Household characteristics
site +
#random intercepts for househod
(1 | record_id),
data=agegps,
family = bernoulli,
prior = prior,
seed = 13,
chains = 3,
control=list(adapt_delta=0.99))## Warning: Rows containing NAs were excluded from the model.
## Compiling the C++ model
## recompiling to avoid crashing R session
## Trying to compile a simple C file
## Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
## clang -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG -I"/Library/Frameworks/R.framework/Versions/3.6/Resources/library/Rcpp/include/" -I"/Library/Frameworks/R.framework/Versions/3.6/Resources/library/RcppEigen/include/" -I"/Library/Frameworks/R.framework/Versions/3.6/Resources/library/RcppEigen/include/unsupported" -I"/Library/Frameworks/R.framework/Versions/3.6/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/3.6/Resources/library/StanHeaders/include/src/" -I"/Library/Frameworks/R.framework/Versions/3.6/Resources/library/StanHeaders/include/" -I"/Library/Frameworks/R.framework/Versions/3.6/Resources/library/rstan/include" -DEIGEN_NO_DEBUG -D_REENTRANT -DBOOST_DISABLE_ASSERTS -DBOOST_PENDING_INTEGER_LOG2_HPP -include stan/math/prim/mat/fun/Eigen.hpp -isysroot /Library/Developer/CommandLineTools/SDKs/MacOSX.sdk -I/usr/local/include -fPIC -isysroot /Library/Developer/CommandLineTools/SDKs/MacOSX.sdk -c foo.c -o foo.o
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/3.6/Resources/library/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp:13:
## In file included from /Library/Frameworks/R.framework/Versions/3.6/Resources/library/RcppEigen/include/Eigen/Dense:1:
## In file included from /Library/Frameworks/R.framework/Versions/3.6/Resources/library/RcppEigen/include/Eigen/Core:88:
## /Library/Frameworks/R.framework/Versions/3.6/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:613:1: error: unknown type name 'namespace'
## namespace Eigen {
## ^
## /Library/Frameworks/R.framework/Versions/3.6/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:613:16: error: expected ';' after top level declarator
## namespace Eigen {
## ^
## ;
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/3.6/Resources/library/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp:13:
## In file included from /Library/Frameworks/R.framework/Versions/3.6/Resources/library/RcppEigen/include/Eigen/Dense:1:
## /Library/Frameworks/R.framework/Versions/3.6/Resources/library/RcppEigen/include/Eigen/Core:96:10: fatal error: 'complex' file not found
## #include <complex>
## ^~~~~~~~~
## 3 errors generated.
## make: *** [foo.o] Error 1
## Start sampling
##
## SAMPLING FOR MODEL 'f5ff6fdc8040aa30a1d89deb8cfd80b3' NOW (CHAIN 1).
## Chain 1:
## Chain 1: Gradient evaluation took 0.000502 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 5.02 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1:
## Chain 1:
## Chain 1: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 1: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 1: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 1: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 1: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 1: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 1: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 1: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 1: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 1: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 1: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 1: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 1:
## Chain 1: Elapsed Time: 39.7565 seconds (Warm-up)
## Chain 1: 15.8605 seconds (Sampling)
## Chain 1: 55.617 seconds (Total)
## Chain 1:
##
## SAMPLING FOR MODEL 'f5ff6fdc8040aa30a1d89deb8cfd80b3' NOW (CHAIN 2).
## Chain 2:
## Chain 2: Gradient evaluation took 0.000201 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 2.01 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2:
## Chain 2:
## Chain 2: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 2: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 2: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 2: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 2: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 2: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 2: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 2: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 2: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 2: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 2: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 2: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 2:
## Chain 2: Elapsed Time: 38.2153 seconds (Warm-up)
## Chain 2: 15.6285 seconds (Sampling)
## Chain 2: 53.8438 seconds (Total)
## Chain 2:
##
## SAMPLING FOR MODEL 'f5ff6fdc8040aa30a1d89deb8cfd80b3' NOW (CHAIN 3).
## Chain 3:
## Chain 3: Gradient evaluation took 0.000203 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 2.03 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3:
## Chain 3:
## Chain 3: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 3: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 3: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 3: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 3: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 3: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 3: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 3: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 3: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 3: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 3: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 3: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 3:
## Chain 3: Elapsed Time: 41.4861 seconds (Warm-up)
## Chain 3: 15.7983 seconds (Sampling)
## Chain 3: 57.2843 seconds (Total)
## Chain 3:
## Warning: Method 'stanplot' is deprecated. Please use 'mcmc_plot' instead.
## Family: bernoulli
## Links: mu = logit
## Formula: tst_10mm ~ hiv_i + ageyears_i + site + (1 | record_id)
## Data: agegps (Number of observations: 2725)
## Samples: 3 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 3000
##
## Group-Level Effects:
## ~record_id (Number of levels: 877)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 1.63 0.15 1.34 1.92 1.00 873 1781
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept -2.71 0.33 -3.39 -2.07 1.00 2249 2770
## hiv_iIndexHIVPve -0.27 0.19 -0.64 0.09 1.00 2651 2441
## ageyears_i -0.02 0.01 -0.03 -0.01 1.00 2554 2597
## siteMangaung 1.51 0.22 1.11 1.94 1.00 2324 2277
##
## Samples were drawn using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
## Using 10 posterior samples for ppc type 'dens_overlay' by default.
Model output
#Look at the posterior odds ratios
f2_graph_data <- posterior_samples(fit2)
f2_graph_data <- f2_graph_data %>%
select(starts_with("b_")) %>%
gather(key, value) %>%
group_by(key) %>%
median_qi(value, .width=0.95) %>%
mutate_at(vars(value, .lower, .upper), exp) %>%
mutate(model = "tst >=10mm")
f2_graph_data %>%
gt() %>%
fmt_number(
columns = vars(value, .lower, .upper),
decimals = 2,
use_seps = FALSE
)| key | value | .lower | .upper | .width | .point | .interval | model |
|---|---|---|---|---|---|---|---|
| b_ageyears_i | 0.98 | 0.97 | 0.99 | 0.95 | median | qi | tst >=10mm |
| b_hiv_iIndexHIVPve | 0.77 | 0.53 | 1.10 | 0.95 | median | qi | tst >=10mm |
| b_Intercept | 0.07 | 0.03 | 0.13 | 0.95 | median | qi | tst >=10mm |
| b_siteMangaung | 4.52 | 3.03 | 6.97 | 0.95 | median | qi | tst >=10mm |
# get the fitted estimates
nd <- agegps %>%
expand(hiv_i, site,
ageyears_i = modelr::seq_range(ageyears_i, n=100)
)
fitted_fit2 <- fitted(fit2, re_formula = NA, newdata = nd)
fitted_graph_data2 <- cbind(nd, fitted_fit2)
#Graph fitted estimates
f2_graph_a <- fitted_graph_data2 %>%
ggplot() +
geom_line(aes(y=Estimate, x=ageyears_i, group=1)) +
geom_line(aes(y=Q2.5, x=ageyears_i), group=1, linetype=4) +
geom_line(aes(y=Q97.5, x=ageyears_i), group=1, linetype=4) +
facet_grid(hiv_i~ site) +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
theme_bw(base_family = "Open Sans Condensed Light") +
labs(x="Index case age (years)",
y="",
title = paste("B) Percentage (95% credible interval) of household contacts with TST induration", "\u2265","10mm"))
f2_graph_aHypothesis: do household contacts of HIV-positive index cases have a lower probability of TST positivity?
## b_hiv_iIndexHIVPve
## 0.922
fitted_fit2_alt <- fitted(fit2, re_formula = NA, newdata = nd, probs = c(0.025, 0.05, 0.1, 0.25, 0.33, 0.66, 0.75, 0.9, 0.95, 0.975))
fitted_graph_data_alt2 <- cbind(nd, fitted_fit2_alt)
#Graph fitted estimates
f2_graph_a_alt2 <- fitted_graph_data_alt2 %>%
ggplot() +
geom_ribbon(aes(ymin=Q2.5, ymax=Q97.5, x=ageyears_i), group=1, fill="#EFF3FF") +
geom_ribbon(aes(ymin=Q5, ymax=Q95, x=ageyears_i), group=1, fill="#BDD7E7") +
geom_ribbon(aes(ymin=Q10, ymax=Q90, x=ageyears_i), group=1, fill="#6BAED6") +
geom_ribbon(aes(ymin=Q25, ymax=Q75, x=ageyears_i), group=1, fill="#3182BD") +
geom_ribbon(aes(ymin=Q33, ymax=Q66, x=ageyears_i), group=1, fill="#08519C") +
geom_line(aes(y=Estimate, x=ageyears_i, group=1)) +
facet_grid(hiv_i~ site) +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
theme_bw(base_family = "Open Sans Condensed Light") +
labs(x="Index case age (years)",
y="",
title = paste("B) Mean percentage (uncertainty intervals) of household contacts with TST induration", "\u2265","10mm"))
f2_graph_a_alt2This reproduction of the analysis was run by:
| keyName | value |
|---|---|
| sysname | Darwin |
| release | 19.3.0 |
| version | Darwin Kernel Version 19.3.0: Thu Jan 9 20:58:23 PST 2020; root:xnu-6153.81.5~1/RELEASE_X86_64 |
| nodename | petermacphersons-MacBook-Pro.local |
| machine | x86_64 |
| login | root |
| user | peter.macpherson |
| effective_user | peter.macpherson |
Analysis was run at 2020-02-26 13:01:47, and using the following Session Info:
R version 3.6.2 (2019-12-12)
Platform: x86_64-apple-darwin15.6.0 (64-bit)
Running under: macOS Catalina 10.15.3
Matrix products: default
BLAS: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
locale:
[1] en_GB.UTF-8/en_GB.UTF-8/en_GB.UTF-8/C/en_GB.UTF-8/en_GB.UTF-8
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] glue_1.3.1.9000 here_0.1 patchwork_1.0.0
[4] cowplot_1.0.0 janitor_1.2.1 gt_0.1.0
[7] Hmisc_4.3-1 Formula_1.2-3 survival_3.1-8
[10] lattice_0.20-38 tidybayes_2.0.1 brms_2.11.1
[13] Rcpp_1.0.3 arsenal_3.4.0 knitr_1.28
[16] pmthemes_0.0.0.9000 forcats_0.4.0 stringr_1.4.0
[19] dplyr_0.8.4 purrr_0.3.3 readr_1.3.1
[22] tidyr_1.0.2 tibble_2.1.3 ggplot2_3.2.1
[25] tidyverse_1.3.0
loaded via a namespace (and not attached):
[1] readxl_1.3.1 backports_1.1.5 plyr_1.8.5
[4] igraph_1.2.4.2 lazyeval_0.2.2 splines_3.6.2
[7] svUnit_0.7-12 crosstalk_1.0.0 rstantools_2.0.0
[10] inline_0.3.15 digest_0.6.24 htmltools_0.4.0
[13] rsconnect_0.8.16 fansi_0.4.1 magrittr_1.5
[16] checkmate_2.0.0 cluster_2.1.0 modelr_0.1.5
[19] matrixStats_0.55.0 xts_0.12-0 prettyunits_1.1.1
[22] jpeg_0.1-8.1 colorspace_1.4-1 rvest_0.3.5
[25] haven_2.2.0 xfun_0.12 callr_3.4.2
[28] crayon_1.3.4 jsonlite_1.6.1 zoo_1.8-7
[31] gtable_0.3.0 pkgbuild_1.0.6 rstan_2.19.3
[34] abind_1.4-5 scales_1.1.0 mvtnorm_1.0-12
[37] DBI_1.1.0 miniUI_0.1.1.1 xtable_1.8-4
[40] htmlTable_1.13.3 foreign_0.8-72 stats4_3.6.2
[43] StanHeaders_2.21.0-1 DT_0.12 htmlwidgets_1.5.1
[46] httr_1.4.1 threejs_0.3.3 arrayhelpers_1.1-0
[49] RColorBrewer_1.1-2 ellipsis_0.3.0 acepack_1.4.1
[52] farver_2.0.3 pkgconfig_2.0.3 loo_2.2.0
[55] nnet_7.3-12 sass_0.1.2.1 dbplyr_1.4.2
[58] utf8_1.1.4 labeling_0.3 tidyselect_1.0.0
[61] rlang_0.4.4 reshape2_1.4.3 later_1.0.0
[64] munsell_0.5.0 cellranger_1.1.0 tools_3.6.2
[67] cli_2.0.1 generics_0.0.2 broom_0.5.4
[70] ggridges_0.5.2 evaluate_0.14 fastmap_1.0.1
[73] yaml_2.2.1 processx_3.4.2 fs_1.3.1
[76] nlme_3.1-142 mime_0.9 xml2_1.2.2
[79] compiler_3.6.2 bayesplot_1.7.1 shinythemes_1.1.2
[82] rstudioapi_0.11 png_0.1-7 reprex_0.3.0
[85] stringi_1.4.6 highr_0.8 ps_1.3.2
[88] Brobdingnag_1.2-6 Matrix_1.2-18 commonmark_1.7
[91] markdown_1.1 shinyjs_1.1 vctrs_0.2.2
[94] pillar_1.4.3 lifecycle_0.1.0 bridgesampling_0.8-1
[97] data.table_1.12.8 httpuv_1.5.2 R6_2.4.1
[100] latticeExtra_0.6-29 promises_1.1.0 gridExtra_2.3
[103] codetools_0.2-16 colourpicker_1.0 gtools_3.8.1
[106] assertthat_0.2.1 rprojroot_1.3-2 withr_2.1.2
[109] shinystan_2.5.0 parallel_3.6.2 hms_0.5.3
[112] grid_3.6.2 rpart_4.1-15 coda_0.19-3
[115] snakecase_0.11.0 rmarkdown_2.1 shiny_1.4.0
[118] lubridate_1.7.4 base64enc_0.1-3 dygraphs_1.1.1.6